Looking at whiff data for left-handed pitchers (35 pitchers from 2022, all with 1842+ seasonal pitches).
library(tidyverse)
library(readxl)
library(knitr)
library(ranger)
library(glmnet)
library(forcats)
library(olsrr)
library(Metrics)
library(mgcv)
library(caret)
# Geom Zone (Jackie's) ####
geom_zone <- function(top = 11/3, bottom = 3/2, linecolor = "black"){
geom_rect(xmin = -.7083, xmax = .7083, ymin = bottom, ymax = top,
alpha = 0, color = linecolor, linewidth = 0.75)
}
# c(0, 0, -.25, -.5, -.25))
geom_plate <- function(pov = "pitcher"){
df <- case_when(
pov == "pitcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, .25, .5, .25)),
pov == "catcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
)
g <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "black", linewidth = 1.25)
g
}
all <- read_csv("CSVs/all_pitches.csv") %>%
select(-...1) %>%
mutate(wOBAr = case_when(
woba >= 0.370 ~ 6,
woba >= 0.340 ~ 5,
woba >= 0.310 ~ 4,
woba >= 0.280 ~ 3,
woba >= 0.250 ~ 2,
woba >= 0.220 ~ 1)) %>%
mutate(wOBAr = as.factor(wOBAr))
whiff <- all %>%
mutate(whiff = description == "swinging_strike",
whiff = as.character(whiff)) %>%
filter(pitch_type != "NA",
pitch_type != "PO")
# Pitch Speed
whiff %>%
ggplot(aes(y = whiff, x = pitch_speed, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Pitch Speed",
x = "Pitch Speed (mph)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Horizontal Movement
whiff %>%
ggplot(aes(y = whiff, x = pfx_x*12, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Horizontal Movement",
x = "Horizontal Movement (in.)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Vertical Movement
whiff %>%
ggplot(aes(y = whiff, x = pfx_z*12, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Horizontal Pitch Location
whiff %>%
ggplot(aes(y = whiff, x = plate_x, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Horizontal Pitch Location",
x = "Horizontal Pitch Location (ft)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Vertical Pitch Location
whiff %>%
ggplot(aes(y = whiff, x = plate_z, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Vertical Pitch Location",
x = "Vertical Pitch Location (ft)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Spin Rate
whiff %>%
ggplot(aes(y = whiff, x = release_spin_rate, color = pitch_name)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~pitch_name) +
labs(title = "Whiff vs. Non-Whiff by Spin Rate",
x = "Spin Rate (rpm)",
y = "Outcome",
color = "Pitch Type") +
NULL
# Slider Logistic Model
whiff_sl <- whiff %>%
filter(pitch_type == "SL") %>%
mutate(whiff = str_replace(whiff, "TRUE", "1"),
whiff = str_replace(whiff, "FALSE", "0"),
whiff = as.numeric(whiff))
# Original Model
model1 <- glm(whiff ~ pitch_speed + spin_axis + pfx_x + pfx_z + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl, family = binomial)
# Reduced Model
model1 <- glm(whiff ~ pitch_speed + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl)
summary(model1)
##
## Call:
## glm(formula = whiff ~ pitch_speed + plate_x + plate_z + release_spin_rate +
## speed_change + break_change + pfx_total + distance, data = whiff_sl)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.474e-02 5.943e-02 -0.248 0.804098
## pitch_speed 2.416e-03 6.789e-04 3.559 0.000373 ***
## plate_x 1.720e-02 1.959e-03 8.782 < 2e-16 ***
## plate_z 3.157e-02 2.477e-03 12.747 < 2e-16 ***
## release_spin_rate 2.714e-05 7.244e-06 3.746 0.000180 ***
## speed_change 8.478e-03 1.604e-03 5.285 1.26e-07 ***
## break_change 4.843e-02 1.230e-02 3.937 8.25e-05 ***
## pfx_total -2.533e-02 6.572e-03 -3.854 0.000116 ***
## distance -1.078e-01 3.328e-03 -32.395 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.121298)
##
## Null deviance: 5646.6 on 45219 degrees of freedom
## Residual deviance: 5484.0 on 45211 degrees of freedom
## (126 observations deleted due to missingness)
## AIC: 32948
##
## Number of Fisher Scoring iterations: 2
preds <- whiff_sl %>%
mutate(prediction_log = predict(model1, whiff_sl),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
preds %>%
ggplot(aes(x = as.character(whiff), y = prediction)) +
geom_boxplot() +
geom_jitter(alpha = 0.1, width = 0.1, height = 0)
preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Whiff proportion by predicted whiff value",
subtitle = "Whiff predictions have a 1% bin width")
whiff %>%
mutate(count = paste0(balls, "-", strikes)) %>%
filter(pitch_type == "SL") %>%
ggplot(aes(y = whiff, x = pfx_z*12)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~count) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome") +
NULL
# Sliders
whiff %>%
filter(pitch_type =="SL") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Fastballs
whiff %>%
filter(pitch_type =="FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Change-Ups
whiff %>%
filter(pitch_type =="CH") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "SL",
prev_pitch %in% c("FF", "CH", "SL", "CU"),
player_name == "Fried, Max") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(prev_pitch), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(wOBAr), rows = vars(whiff)) +
theme_bw()
zoned <- whiff %>%
mutate(loc_x = round(plate_x*3, 0),
loc_y = round(plate_z*3, 0))
zoned %>%
filter(pitch_type == "FF",
plate_z > 0 & plate_z < 6,
plate_x > -1.5 & plate_x < 1.5) %>%
summarize(whiff_perc = mean(whiff == "TRUE"),
pitches = n(),
.by = c(loc_x, loc_y, wOBAr)) %>%
filter(pitches >= 10) %>%
ggplot(aes(x = -loc_x, y = loc_y, fill = whiff_perc)) +
geom_tile() +
scale_fill_gradient(low = "gray", high = "red") +
facet_wrap(~ wOBAr) +
coord_fixed() +
theme_bw()
all %>%
ggplot(aes(x = wOBAr, pitch_speed, color = wOBAr)) +
geom_boxplot() +
facet_wrap(~ pitch_name)
all %>%
ggplot(aes(x = estimated_woba_using_speedangle, pitch_speed)) +
geom_point(alpha = 0.2) +
facet_wrap(~ pitch_name)